Avgpooling

对NHWC格式的输入张量执行2D平均池化,并随后进行范围裁剪(Clip)激活。

该算子融合了两个步骤:

  1. 平均池化 (Average Pooling):

\[\text{Pool}_{i,j} = \frac{1}{k_h \times k_w} \sum_{m=0}^{k_h-1} \sum_{n=0}^{k_w-1} \text{Input}_{i \cdot s_h + m, j \cdot s_w + n}\]
  1. 裁剪激活 (Clipping Activation):

\[\text{Output} = \max(\min\_val, \min(\text{Pool}, \max\_val))\]
输入:
  • input - 输入张量的数据地址。格式: NHWC。

  • params - 其他参数打包成数组。

  • core_mask - 核掩码。

输出:
  • output - 输出张量的数据地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp32, int8

  • MT7004 支持fp16, fp32

参数数组结构:

 1long long params[16];
 2params[0] = (long long)in_w;  输入特征图的宽度。
 3params[1] = (long long)in_h;  输入特征图的高度。
 4params[2] = (long long)win_w;  池化核的宽度。
 5params[3] = (long long)win_h;  池化核的高度。
 6params[4] = (long long)output_w;  输出特征图的宽度。
 7params[5] = (long long)output_h;  输出特征图的高度。
 8params[6] = (long long)output_batch;  输出特征图的批大小。
 9params[7] = (long long)channel;  输出特征图的通道数。
10params[8] = (long long)stride_w;  水平方向的步长。
11params[9] = (long long)stride_h;  垂直方向的步长。
12params[10] = (long long)pad_l;  左边距填充。
13params[11] = (long long)pad_u;  上边距填充。
14params[12] = (long long)&minf;  裁剪范围的最小值地址。
15params[13] = (long long)&maxf;  裁剪范围的最大值地址。

共享存储版本:

void i8_avgpool_fusion_s(int8_t *input, int8_t *output, long long *params, int core_mask)
void fp_avgpool_fusion_s(float *input, float *output, long long *params, int core_mask)
void hp_avgpool_fusion_s(half *input, half *output, long long *params, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <avgpooling.h>
 4int main(int argc, char* argv[]) {
 5     float *input_ptr = (float *)0x81000000;
 6     float *output_ptr = (float *)0x82000000;
 7
 8     int output_batch = 16; //batch数
 9     int channel = 4;
10     int in_w = 16;
11     int in_h = 16;
12
13     int win_w = 2;
14     int win_h = 2;
15     int stride_w = 2;
16     int stride_h = 2;
17     int pad_l = 0;
18     int pad_u = 0;
19     float minf = 0;
20     float maxf = 50;
21
22     //计算output_w和output_h
23     int dividor = in_w + pad_l + 0 - win_w;
24     int output_w = (dividor + stride_w - 1) / stride_w  + 1;
25     int dividor2 = in_h + pad_u + 0 - win_h;
26     int output_h = (dividor2 + stride_h - 1) / stride_h  + 1;
27
28     long long params[16];
29     params[0] = (long long)in_w;
30     params[1] = (long long)in_h;
31     params[2] = (long long)win_w;
32     params[3] = (long long)win_h;
33     params[4] = (long long)output_w;
34     params[5] = (long long)output_h;
35     params[6] = (long long)output_batch;
36     params[7] = (long long)channel;
37     params[8] = (long long)stride_w;
38     params[9] = (long long)stride_h;
39     params[10] = (long long)pad_l;
40     params[11] = (long long)pad_u;
41     params[12] = (long long)&minf; //注意这里传指针,不能直接强制转换成long long
42     params[13] = (long long)&maxf;
43
44     srand(time(NULL));
45
46     //初始化output_ptr
47     int input_size = output_batch * channel * in_w * in_h;
48     int i;
49     for (i = 0; i < input_size; i++) {
50         input_ptr[i] = (float)(rand() % 100);
51     }
52     int core_mask = 0b1111;
53     fp_avg_pooling_s(input_ptr, output_ptr, params, core_mask);
54     return 0;
55}

私有存储版本:

void i8_avgpool_fusion_p(int8_t *input, int8_t *output, long long *params)
void fp_avgpool_fusion_p(float *input, float *output, long long *params)
void hp_avgpool_fusion_p(half *input, half *output, long long *params)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <avgpooling.h>
 4int main(int argc, char* argv[]) {
 5     float *input_ptr = (float *)0x10010000;
 6     float *output_ptr = (float *)0x10020000;
 7
 8     int output_batch = 16; //batch数
 9     int channel = 4;
10     int in_w = 16;
11     int in_h = 16;
12
13     int win_w = 2;
14     int win_h = 2;
15     int stride_w = 2;
16     int stride_h = 2;
17     int pad_l = 0;
18     int pad_u = 0;
19     float minf = 0;
20     float maxf = 50;
21
22     //计算output_w和output_h
23     int dividor = in_w + pad_l + 0 - win_w;
24     int output_w = (dividor + stride_w - 1) / stride_w  + 1;
25     int dividor2 = in_h + pad_u + 0 - win_h;
26     int output_h = (dividor2 + stride_h - 1) / stride_h  + 1;
27
28     long long params[16];
29     params[0] = (long long)in_w;
30     params[1] = (long long)in_h;
31     params[2] = (long long)win_w;
32     params[3] = (long long)win_h;
33     params[4] = (long long)output_w;
34     params[5] = (long long)output_h;
35     params[6] = (long long)output_batch;
36     params[7] = (long long)channel;
37     params[8] = (long long)stride_w;
38     params[9] = (long long)stride_h;
39     params[10] = (long long)pad_l;
40     params[11] = (long long)pad_u;
41     params[12] = (long long)&minf; //注意这里传指针,不能直接强制转换成long long
42     params[13] = (long long)&maxf;
43
44     srand(time(NULL));
45
46     //初始化output_ptr
47     int input_size = output_batch * channel * in_w * in_h;
48     int i;
49     for (i = 0; i < input_size; i++) {
50         input_ptr[i] = (float)(rand() % 100);
51     }
52     int core_mask = 0b1111;
53     fp_avg_pooling_p(input_ptr, output_ptr, params);
54     return 0;
55}